HMM and masting
1 Load data
raw_data <- read.csv(file.path(wd, 'data', 'ebms', 'Beech_tree-ring_masting_data.csv'))
clim_data <- readRDS(file.path(wd, 'data', 'ebms', 'era5land_sitesextract.rds'))
raw_data$uniqueID <- paste0(raw_data$site.ID, "_", raw_data$tree.ID)
raw_data <- na.omit(raw_data[,c('site.ID', 'uniqueID', 'year', 'seeds')])
uniq_tree_ids <- unique(raw_data$uniqueID)
N_trees <- length(uniq_tree_ids)
N_sites <- length(unique(raw_data$site.ID))
first_year <- min(raw_data$year)
last_year <- max(raw_data$year)
max_years <- first_year:max(raw_data$year)
N_max_years <- length(max_years)We have 57 trees on 7 sites. Observations were collected from 1980 to 2022.
2 Format data
seed_counts <- c()
years <- c()
prevsummer_temps <-c()
spring_temps <-c()
gdd_lastfrost <-c()
N_years <- c()
idx <- 1
tree_start_idxs <- c()
tree_end_idxs <- c()
for (tid in uniq_tree_ids) {
raw_data_tree <- raw_data[raw_data$uniqueID == tid,]
years_tree <- raw_data_tree$year-first_year+1
years <- c(years, years_tree)
N_years_tree <- length(years_tree)
N_years <- c(N_years, N_years_tree)
seed_count_tree <- raw_data_tree$seeds
seed_counts <- c(seed_counts, seed_count_tree)
prevsummer_temps_tree <- clim_data[clim_data$site.ID == raw_data_tree$site.ID[1] & clim_data$year %in% (max_years-1), 'meantmax_ja'] # all years, even those unobserved
prevsummer_temps <- c(prevsummer_temps, prevsummer_temps_tree)
spring_temps_tree <- clim_data[clim_data$site.ID == raw_data_tree$site.ID[1] & clim_data$year %in% (max_years-1), 'meantmean_am'] # all years, even those unobserved
spring_temps <- c(spring_temps, spring_temps_tree)
gdd_lastfrost_tree <- clim_data[clim_data$site.ID == raw_data_tree$site.ID[1] & clim_data$year %in% (max_years-1), 'gdd_b5_tolastfrost']/10 # all years, even those unobserved, in *10degC
gdd_lastfrost <- c(gdd_lastfrost, gdd_lastfrost_tree)
tree_start_idxs <- c(tree_start_idxs, idx)
idx <- idx + N_years_tree
tree_end_idxs <- c(tree_end_idxs, idx - 1)
}3 Create some future climates
We can create some false future summer conditions, based on a pessimistic increase of 5°C in one century.
For spring frost risk and spring temperature, we may simply apply the observed trends.
4 Create new trees to predict
trees_per_stand <- 100
unique_stands <- "Benwell"
newtree_stand_idxs <- rep(which(unique_stands==unique_stands), each = trees_per_stand)
N_newtrees <- length(newtree_stand_idxs)
N_max_newyears <- length(years_to_predict)
first_newyear <- min(years_to_predict)
newyears <- c()
newprevsummer_temps <-c()
newspring_temps <-c()
newgdd_lastfrost <-c()
N_newyears <- c()
idx <- 1
newtree_start_idxs <- c()
newtree_end_idxs <- c()
for (i in 1:N_newtrees) {
newyears_tree <- years_to_predict-first_newyear+1
newyears <- c(newyears, newyears_tree)
N_newyears_tree <- length(newyears_tree)
N_newyears <- c(N_newyears, N_newyears_tree)
newprevsummer_temps_tree <- summertemp
newprevsummer_temps <- c(newprevsummer_temps, newprevsummer_temps_tree)
newspring_temps_tree <- springtemp
newspring_temps <- c(newspring_temps, newspring_temps_tree)
newgdd_lastfrost_tree <- frostgdd/10
newgdd_lastfrost <- c(newgdd_lastfrost, newgdd_lastfrost_tree)
newtree_start_idxs <- c(newtree_start_idxs, idx)
idx <- idx + N_newyears_tree
newtree_end_idxs <- c(newtree_end_idxs, idx - 1)
}5 Posterior quantification
N <- length(years)
Nnew <- length(newyears)
data <- mget(c('N', 'N_trees', 'N_max_years',
'N_years', 'tree_start_idxs', 'tree_end_idxs',
'seed_counts', 'years',
'prevsummer_temps', 'spring_temps', 'gdd_lastfrost',
# for predictions
'Nnew', 'N_newtrees', 'N_max_newyears', 'newtree_stand_idxs',
'N_newyears', 'newtree_start_idxs', 'newtree_end_idxs','newyears',
'newprevsummer_temps', 'newgdd_lastfrost', 'newspring_temps'
))
# Posterior quantification
if(FALSE){
fit <- stan(file= file.path(wd, "stan", "paper_models", "model2_treelevel_zinb_missing_rescaled_summertempfull_springfrost_springtemp_constrainedslopes_wpred_breakdownpred_standlevel.stan"),
data=data, seed=5838299, chain = 4, cores = 4,
warmup=1000, iter=2024, refresh=100)
saveRDS(fit, file = file.path(wd, 'output', 'fit_25sept2025_fullmodel_constrainedslopes.rds'))
diagnostics <- util$extract_hmc_diagnostics(fit)
util$check_all_hmc_diagnostics(diagnostics)
samples <- util$extract_expectand_vals(fit)
base_samples <- util$filter_expectands(samples,
c('lambda1', 'theta1', 'psi1',
'lambda20', 'psi2',
'beta_lambda2_frost', 'beta_lambda2_spring',
'rho0', 'beta_nm_m', 'beta_m_m',
'tau_nm_m0', 'tau_m_m0'))
util$check_all_expectand_diagnostics(base_samples)
fit <- stan(file= file.path(wd, "stan", "paper_models", "model2_treelevel_zinb_missing_rescaled_summertemp_springfrost_springtemp_constrainedslopes_wpred.stan"),
data=data, seed=5838299, chain = 4, cores = 4,
warmup=1000, iter=2024, refresh=100)
saveRDS(fit, file = file.path(wd, 'output', 'fit_25sept2025_biologymodel_constrainedslopes.rds'))
diagnostics <- util$extract_hmc_diagnostics(fit)
util$check_all_hmc_diagnostics(diagnostics)
samples <- util$extract_expectand_vals(fit)
base_samples <- util$filter_expectands(samples,
c('lambda1', 'theta1', 'psi1',
'lambda20', 'psi2',
'beta_lambda2_frost', 'beta_lambda2_spring',
'rho0', 'beta_nm_m',
'tau_nm_m0', 'tau_m_m0'))
util$check_all_expectand_diagnostics(base_samples)
fit <- stan(file= file.path(wd, "stan", "paper_models", "model2_treelevel_zinb_missing_rescaled_summertemp_springfrost_springtemp_wpred_standlevel.stan"),
data=data, seed=5838299, chain = 4, cores = 4,
warmup=1000, iter=2024, refresh=100)
saveRDS(fit, file = file.path(wd, 'output', 'fit_25sept2025_biologymodel.rds'))
diagnostics <- util$extract_hmc_diagnostics(fit)
util$check_all_hmc_diagnostics(diagnostics)
samples <- util$extract_expectand_vals(fit)
base_samples <- util$filter_expectands(samples,
c('lambda1', 'theta1', 'psi1',
'lambda20', 'psi2',
'beta_lambda2_frost', 'beta_lambda2_spring',
'rho0', 'beta_nm_m',
'tau_nm_m0', 'tau_m_m0'))
util$check_all_expectand_diagnostics(base_samples)
}else{
fit_full_const <- readRDS(file.path(wd, 'output', 'fit_25sept2025_fullmodel_constrainedslopes.rds'))
fit_biol_const <- readRDS(file = file.path(wd, 'output', 'fit_25sept2025_biologymodel_constrainedslopes.rds'))
fit_biol <- readRDS(file = file.path(wd, 'output', 'fit_25sept2025_biologymodel.rds'))
}6 Retrodictive check
6.1 Across all trees
samples <- util$extract_expectand_vals(fit_biol)
# Retrodictive check
observed_idxs <- c()
for (t in 1:data$N_trees){
idxs <- tree_start_idxs[t]:tree_end_idxs[t]
observed_idxs_tree <- N_max_years*(t-1)+years[idxs]
observed_idxs <- c(observed_idxs, observed_idxs_tree)
}
par(mfrow=c(1, 1), mar = c(4,4,2,2))
names <- sapply(observed_idxs, function(n) paste0('seed_counts_pred[',n,']'))
util$plot_hist_quantiles(samples[names], 'seed_counts_pred', 0, 340, 20,
baseline_values=data$seed_counts,
xlab="Seed counts")Warning in check_bin_containment(bin_min, bin_max, collapsed_values,
"predictive value"): 95576 predictive values (1.6%) fell above the binning.
Warning in check_bin_containment(bin_min, bin_max, baseline_values, "observed
value"): 16 observed values (1.1%) fell above the binning.
6.2 For some individual trees
set.seed(123456)
par(mfrow=c(3, 3), mar = c(4,4,2,2))
for (t in sample(1:data$N_trees,9)) {
idxs_tree <-(1+N_max_years*(t-1)):(N_max_years*t)
observed_idxs_tree <- tree_start_idxs[t]:tree_end_idxs[t]
observed_flags <- years[tree_start_idxs[t]:tree_end_idxs[t]]
names <- sapply(idxs_tree,
function(n) paste0('seed_counts_pred[', n, ']'))
xlab="Year"
xticklabs=first_year:last_year
ylab="Seed Counts"
display_ylim=c(0, 400)
main=paste("Tree", uniq_tree_ids[t])
# Construct bins
N <- length(names)
bin_min <- 0.5
bin_max <- N + 0.5
bin_delta <- 1
breaks <- seq(bin_min, bin_max, bin_delta)
plot_config <- util$configure_bin_plotting(breaks)
plot_idxs <- plot_config[[1]]
plot_xs <- plot_config[[2]]
# Construct marginal quantiles
probs <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
calc <- function(n) {
util$ensemble_mcmc_quantile_est(samples[[names[n]]], probs)
}
quantiles <- sapply(1:N, calc)
plot_quantiles <- do.call(cbind, lapply(plot_idxs,
function(n) quantiles[1:9, n]))
delta <- 0.05 * (display_ylim[2] - display_ylim[1])
display_ylim[1] <- display_ylim[1] - delta
display_ylim[2] <- display_ylim[2] + delta
plot(1, type="n", main=main,
xlim=c(bin_min, bin_max), xlab=xlab, xaxt="n",
ylim=display_ylim, ylab=ylab)
axis(1, at=1:N, labels=xticklabs)
for(n in 1:N){
idplot <- c(n*2-1, n*2)
polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
c(plot_quantiles[1,idplot], rev(plot_quantiles[9,idplot])),
col = ifelse(n%in%observed_flags, util$c_light, 'grey90'), border = NA)
polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
c(plot_quantiles[2,idplot], rev(plot_quantiles[8,idplot])),
col = ifelse(n%in%observed_flags, util$c_light_highlight, 'grey80'), border = NA)
polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
c(plot_quantiles[3,idplot], rev(plot_quantiles[7,idplot])),
col = ifelse(n%in%observed_flags, util$c_mid, 'grey70'), border = NA)
polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
c(plot_quantiles[4,idplot], rev(plot_quantiles[6,idplot])),
col = ifelse(n%in%observed_flags, util$c_mid_highlight, 'grey60'), border = NA)
lines(plot_xs[idplot], plot_quantiles[5, idplot],
col= ifelse(n%in%observed_flags,util$c_dark, 'grey50'), lwd=2)
}
for(i in observed_idxs_tree) {
lines(c(years[i] - 0.5, years[i] + 0.5),
rep(seed_counts[i], 2),
col="white", lwd=4)
lines(c(years[i] - 0.5, years[i] + 0.5),
rep(data$seed_counts[i], 2),
col="black", lwd=2)
}
}7 Posterior inferences
First let’s look at the transition matrix parameters!
\rho_0 is the initial masting probability, \tau_0^{\text{M} \rightarrow \text{M}} is the probability of staying in a masting state, \tau_0^{\text{NM} \rightarrow \text{M}} is the probability of transition from non-masting to masting (at 15°C, a cold summer), modified by summer temperature with the slope \beta_\text{summer}^{\text{NM} \rightarrow \text{M}}.
Then look at the seed production parameters!
\lambda_\text{NM} is the mean production of seeds in a non-masting state, with an overdispersion (inverse of precision) \psi_\text{NM}. \theta_\text{NM} is the probability to observe a zero in a non-masting state (zero-inflation). \lambda_\text{NM} is the mean production of seeds in a masting state (at some baseline conditions…), increase or decrease by spring frost risk with \beta_\text{frost}^\text{M} and spring temperature with \beta_\text{spring}^\text{M}. \psi_\text{M} is the overdispersion parameter for masting.